import numpy as np
import pandas as pd
import plotly.graph_objects as go
Załadowanie zbioru danych:
df = pd.read_csv('complete.csv')
df.head()
| awardYear | category | categoryFullName | sortOrder | portion | prizeAmount | prizeAmountAdjusted | dateAwarded | prizeStatus | motivation | ... | org_founded_country | org_founded_countryNow | org_founded_locationString | ind_or_org | residence_1 | residence_2 | affiliation_1 | affiliation_2 | affiliation_3 | affiliation_4 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2001 | Economic Sciences | The Sveriges Riksbank Prize in Economic Scienc... | 2 | 1/3 | 10000000 | 12295082 | 2001-10-10 | received | for their analyses of markets with asymmetric ... | ... | NaN | NaN | NaN | Individual | NaN | NaN | Stanford University, Stanford, CA, USA | NaN | NaN | NaN |
| 1 | 1975 | Physics | The Nobel Prize in Physics | 1 | 1/3 | 630000 | 3404179 | 1975-10-17 | received | for the discovery of the connection between co... | ... | NaN | NaN | NaN | Individual | NaN | NaN | Niels Bohr Institute, Copenhagen, Denmark | NaN | NaN | NaN |
| 2 | 2004 | Chemistry | The Nobel Prize in Chemistry | 1 | 1/3 | 10000000 | 11762861 | 2004-10-06 | received | for the discovery of ubiquitin-mediated protei... | ... | NaN | NaN | NaN | Individual | NaN | NaN | Technion - Israel Institute of Technology, Hai... | NaN | NaN | NaN |
| 3 | 1982 | Chemistry | The Nobel Prize in Chemistry | 1 | 1 | 1150000 | 3102518 | 1982-10-18 | received | for his development of crystallographic electr... | ... | NaN | NaN | NaN | Individual | NaN | NaN | MRC Laboratory of Molecular Biology, Cambridge... | NaN | NaN | NaN |
| 4 | 1979 | Physics | The Nobel Prize in Physics | 2 | 1/3 | 800000 | 2988048 | 1979-10-15 | received | for their contributions to the theory of the u... | ... | NaN | NaN | NaN | Individual | NaN | NaN | International Centre for Theoretical Physics, ... | Imperial College, London, United Kingdom | NaN | NaN |
5 rows × 52 columns
Stworzenie kolumny zawierającej wiek laureatów w momencie odebrania nagrody (dla uproszczenia liczony rocznikowo):
df = df[ ~(df['birth_date'].isna()) ]
df['birth_year'] = df['birth_date'].str.split('-').apply(lambda x: int(x[0]))
df['age'] = df['awardYear'] - df['birth_year']
Funkcja dzieląca zbiór danych na pozbiory pogrupowane po kategoriach (plus dane dla wszystkich ogółem):
def split_data_by_category(data, categories):
return [data['age']] + [data[data['category'] == category]['age'] for category in categories]
Generowanie wykresu:
df_splitted = split_data_by_category(df, df['category'].unique())
# podstawowy wykres
fig = go.Figure()
fig.update_layout(title = "Age of Nobel Laureates")
fig.add_trace(go.Box(y = df['age'], name = 'All'))
for y, name in zip(df_splitted, df['category'].unique()):
fig.add_trace(go.Box(y = y, name = name))
# dodanie możliwości zmieniania na przed i po 1969
fig.update_layout(
updatemenus=[
dict(
active=0,
buttons=list([
dict(label="All years",
method="update",
args=[{"y": df_splitted},
{"title": "Age of Nobel Laureates"}]),
dict(label="Since 1969",
method="update",
args=[{"y": split_data_by_category(df[df['awardYear'] >= 1969], categories = df['category'].unique())},
{"title": "Age of Nobel Laureates since 1969"}]),
dict(label="Before 1969",
method="update",
args=[{"y": split_data_by_category(df[df['awardYear'] < 1969], categories = df['category'].unique())},
{"title": "Age of Nobel Laureates before 1969"}])
])
)
])